package youke.home.textRank;

import org.ansj.domain.Term;

import java.util.List;
import java.util.Set;
import java.util.HashSet;
import java.util.Iterator;

public class StopWord {
    private static Set<String> stopWords = new HashSet<>();

    // 初始化停用词
    public static void init() {
        stopWords.add("的");
        stopWords.add("和");
        stopWords.add("是");
        stopWords.add("在");
        stopWords.add("了");
        stopWords.add("对");
        stopWords.add("与");
        // 你可以添加更多停用词
    }

    // 过滤停用词
    public static void filter(List<Term> terms) {
        Iterator<Term> iter = terms.iterator();
        while (iter.hasNext()) {
            Term term = iter.next();
            if (stopWords.contains(term.getName())) {
                iter.remove();
            }
        }
    }

    // 过滤标点符号
    public static void cleanerPAS(List<Term> terms) {
        Iterator<Term> iter = terms.iterator();
        while (iter.hasNext()) {
            Term term = iter.next();
            if (isPunctuation(term.getName())) {
                iter.remove();
            }
        }
    }

    // 判断是否是标点符号
    private static boolean isPunctuation(String word) {
        return word.matches("[，,。:：？?！!；;【】\\s]");
    }
}

